knitr::opts_chunk$set(
  cache = TRUE,
  autodep = TRUE,
  cache.lazy = FALSE,
  cache.comments = FALSE
)
options(scipen = 999)
cbpalette <- multi.utils::cbpalette()
library(tidyverse)
rstan::rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

source("functions.R")

Create true values dataframe:

true_values <- data.frame(
  parameter = c(
    "beta_prev",
    "log_sigma_phi_prev",
    "beta_anc",
    "log_sigma_b_anc",
    "beta_art",
    "log_sigma_phi_art"
  ),
  true_value = c(
    -2.4,
    log(sqrt(1 / 4)),
    -0.2,
    log(sqrt(1/ 100)),
    0.7,
    log(0.35)
  )
)

Read in results created in reports:

# model0 <- readRDS("depends/results_model0.rds")
model1 <- readRDS("depends/results_model1.rds")
model2 <- readRDS("depends/results_model2.rds")
model3 <- readRDS("depends/results_model3.rds")
model4 <- readRDS("depends/results_model4.rds")

Boxplots

# draw_boxplots(model0) + labs(title = "Model 0")
# draw_boxplots(model1) + labs(title = "Model 1")
# draw_boxplots(model2) + labs(title = "Model 2")
# draw_boxplots(model3) + labs(title = "Model 3")
# draw_boxplots(model4) + labs(title = "Model 4")

Scatterplots

# draw_scatterplots(model0) + labs(title = "Model 0")
draw_scatterplots(model1) + labs(title = "Model 1")
## Warning: Removed 300 rows containing missing values (geom_point).

draw_scatterplots(model2) + labs(title = "Model 2")
## Warning: Removed 720 rows containing missing values (geom_point).

draw_scatterplots(model3) + labs(title = "Model 3")
## Warning: Removed 360 rows containing missing values (geom_point).

draw_scatterplots(model4) + labs(title = "Model 4")
## Warning: Removed 1080 rows containing missing values (geom_point).

Kolmogorov KS test plots

Take samples from all distributions, then compute maximum ECDF difference \(D\) (two-sample Kolmogorov–Smirnov test). We assume that tmbstan represents the gold-standard, and compare performance of aghq and TMB via their similarity to this gold-standard. Higher values of \(D\) correspond to distributions which are more different.

# draw_ksplots_D(model0) + labs(title = "Model 0")
draw_ksplots_D(model1) + labs(title = "Model 1")
## Warning: Removed 14 rows containing missing values (geom_point).

draw_ksplots_D(model2) + labs(title = "Model 2")
## Warning: Removed 83 rows containing missing values (geom_point).

draw_ksplots_D(model3) + labs(title = "Model 3")
## Warning: Removed 27 rows containing missing values (geom_point).

draw_ksplots_D(model4) + labs(title = "Model 4")
## Warning: Removed 96 rows containing missing values (geom_point).

We could also assess \(l\), the location of \(D\). Determining if there are patterns in the location of the greatest ECDF difference could present us with useful insights.

# draw_ksplots_D(model0) + labs(title = "Model 0")
draw_ksplots_l(model1) + labs(title = "Model 1")

draw_ksplots_l(model2) + labs(title = "Model 2")

draw_ksplots_l(model3) + labs(title = "Model 3")

draw_ksplots_l(model4) + labs(title = "Model 4")

MCMC diagnostics

When using Markov chain Monte Carlo (MCMC) methods, as we have for tmbstan, it’s important to assess for convergence.

Traceplots

One way to do this is via traceplots which visualise the chains over the number of iterations specified.

# map(model0, "mcmc_traceplots")
map(model1, "mcmc_traceplots")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

map(model2, "mcmc_traceplots")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

map(model3, "mcmc_traceplots")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

map(model4, "mcmc_traceplots")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

R hat

The \(\hat R\) statistic (“R hat”) can also be used. A value of \(\hat R < 1.1\) is typically sufficient.

# draw_rhatplot(model0) + labs(title = "Model 0")
draw_rhatplot(model1) + labs(title = "Model 1")

draw_rhatplot(model2) + labs(title = "Model 2")

draw_rhatplot(model3) + labs(title = "Model 3")

draw_rhatplot(model4) + labs(title = "Model 4")